options(repos = c(CRAN = "https://cran.rstudio.com"))
library(tidyverse)
library(sf)
library(maps)
library(plotly)
library(ggplot2)
library(ggmap)
library(ggspatial)
crime_and_incarceration_by_state <- read.csv("FinalProject/data/crime_and_incarceration_by_state.csv")
unemployment_county <- read.csv("FinalProject/data/unemployment_county.csv")
state_polygons <- st_read("FinalProject/data/tl_2019_us_state/tl_2019_us_state.shp")
## Reading layer `tl_2019_us_state' from data source
## `/home/rstudio/FinalProject/data/tl_2019_us_state/tl_2019_us_state.shp'
## using driver `ESRI Shapefile'
## Simple feature collection with 56 features and 14 fields
## Geometry type: MULTIPOLYGON
## Dimension: XY
## Bounding box: xmin: -179.2311 ymin: -14.60181 xmax: 179.8597 ymax: 71.43979
## Geodetic CRS: NAD83
# crime_and_incarceration_by_state
crime_and_incarceration_by_state <- crime_and_incarceration_by_state %>%
filter(!(jurisdiction %in% c("ALASKA", "HAWAII", "FEDERAL")))
crime_and_incarceration_by_state <- crime_and_incarceration_by_state %>%
rename(STUSPS = jurisdiction)
crime_and_incarceration_by_state <- crime_and_incarceration_by_state %>%
rename(Year = year)
crime_and_incarceration_by_state <- crime_and_incarceration_by_state %>%
filter(Year >= 2007 & Year <= 2014)
state_abbreviations <- state.abb
crime_and_incarceration_by_state <- crime_and_incarceration_by_state %>%
mutate(STUSPS = tolower(STUSPS),
STUSPS = state_abbreviations[match(STUSPS, tolower(state.name))])
crime_and_incarceration_by_state <- crime_and_incarceration_by_state %>%
mutate(crime_rate = property_crime_total / state_population)
crime_and_incarceration_by_state <- crime_and_incarceration_by_state %>%
select(STUSPS, Year, crime_rate)
head(crime_and_incarceration_by_state)
## STUSPS Year crime_rate
## 1 AL 2007 0.03977699
## 2 AZ 2007 0.04532562
## 3 AR 2007 0.03955486
## 4 CA 2007 0.03043535
## 5 CO 2007 0.02999230
## 6 CT 2007 0.02470599
# unemployment_county
unemployment_county <- unemployment_county %>%
rename(STUSPS = State)
# Step 2: Remove rows for ALASKA, HAWAII, FEDERAL
unemployment_county <- unemployment_county %>%
filter(STUSPS %in% c("AL", "AZ", "AR", "CA", "CO", "CT", "DE", "FL", "GA", "ID", "IL", "IN", "IA", "KS", "KY", "LA", "ME", "MD", "MA", "MI", "MN", "MS", "MO", "MT", "NE", "NV", "NH", "NJ", "NM", "NY", "NC", "ND", "OH", "OK", "OR", "PA", "RI", "SC", "SD", "TN", "TX", "UT", "VT", "VA", "WA", "WV", "WI", "WY"))
# Step 3: Filter years: 2007-2014
unemployment_county <- unemployment_county %>%
filter(Year >= 2007, Year <= 2014)
unemployment_county <- unemployment_county %>%
group_by(STUSPS, Year) %>%
summarize(Avg_Unemployment_Rate = mean(Unemployment.Rate, na.rm = TRUE))
print(unemployment_county)
## # A tibble: 384 × 3
## # Groups: STUSPS [48]
## STUSPS Year Avg_Unemployment_Rate
## <chr> <int> <dbl>
## 1 AL 2007 4.9
## 2 AL 2008 6.98
## 3 AL 2009 13.2
## 4 AL 2010 12.3
## 5 AL 2011 11.2
## 6 AL 2012 9.25
## 7 AL 2013 8.49
## 8 AL 2014 7.90
## 9 AR 2007 6.05
## 10 AR 2008 6.21
## # ℹ 374 more rows
# state_polygons
state_polygons <- state_polygons %>%
filter(!(STUSPS %in% c("AK", "AS", "MP", "PR", "VI", "HI", "GU")))
state_polygons <- select(state_polygons, REGION, STUSPS, geometry)
str(state_polygons)
## Classes 'sf' and 'data.frame': 49 obs. of 3 variables:
## $ REGION : chr "3" "3" "2" "2" ...
## $ STUSPS : chr "WV" "FL" "IL" "MN" ...
## $ geometry:sfc_MULTIPOLYGON of length 49; first list element: List of 1
## ..$ :List of 1
## .. ..$ : num [1:35204, 1:2] -81.7 -81.7 -81.7 -81.7 -81.7 ...
## ..- attr(*, "class")= chr [1:3] "XY" "MULTIPOLYGON" "sfg"
## - attr(*, "sf_column")= chr "geometry"
## - attr(*, "agr")= Factor w/ 3 levels "constant","aggregate",..: NA NA
## ..- attr(*, "names")= chr [1:2] "REGION" "STUSPS"
##########
combined_data <- right_join(state_polygons, unemployment_county, by = "STUSPS") %>%
right_join(., crime_and_incarceration_by_state, by = c("STUSPS", "Year"))
combined_data <- combined_data %>%
select(REGION, STUSPS,Year, Avg_Unemployment_Rate, crime_rate, geometry)
missing_values <- which(is.na(combined_data))
print(missing_values)
## integer(0)
saveRDS(combined_data, "combined_dataset.rds")
#######
selected_states <- c("IL", "CA", "ID", "IN")
time_series_data <- combined_data %>%
filter(STUSPS %in% selected_states)
unemployment_plot <- plot_ly(
data = time_series_data,
x = ~Year,
y = ~Avg_Unemployment_Rate,
color = ~STUSPS) %>%
add_lines() %>%
layout(title = "Unemployment Rate Over Time",
xaxis = list(title = "Year"),
yaxis = list(title = "Average Unemployment Rate"))
spatial_map <- ggplot() +
geom_sf(data = combined_data %>% filter(Year == 2014),
aes(fill = Avg_Unemployment_Rate),
color = "white",
size = 0.2) +
scale_fill_viridis_c() +
labs(title = "Unemployment Rate Spatial Map (2014)",
fill = "Average Unemployment Rate") +
ggsn::scalebar(data = combined_data, dist = 1000, dist_unit = "km", location = "bottomleft", transform = TRUE, model = "WGS84") +
ggspatial::annotation_north_arrow(location = "br", which_north = "true", style = north_arrow_fancy_orienteering) +
xlab("Longitude") + ylab("Latitude") +
theme_minimal()
spatial_map_region_1 <- ggplot() +
geom_sf(data = combined_data %>% filter(Year == 2014, REGION == 1), # Filter for Region 1
aes(fill = Avg_Unemployment_Rate),
color = "white",
size = 0.2) +
scale_fill_viridis_c() +
labs(title = "Unemployment Rate Spatial Map for Region 1 (2014)",
fill = "Average Unemployment Rate") +
ggsn::scalebar(data = combined_data, dist = 200, dist_unit = "km", location = "bottomleft", transform = TRUE, model = "WGS84") +
ggspatial::annotation_north_arrow(location = "br", which_north = "true", style = north_arrow_fancy_orienteering) +
xlab("Longitude") + ylab("Latitude") +
theme_minimal()
scatter_plot <- plot_ly(
data = combined_data %>% filter(Year == 2014),
x = ~crime_rate,
y = ~Avg_Unemployment_Rate,
color = ~REGION) %>%
add_markers(text = ~paste("State: ", STUSPS)) %>%
layout(title = "Scatter Plot for Unemployment Rate vs Crime Rate (2014)",
xaxis = list(title = "Crime Rate"),
yaxis = list(title = "Average Unemployment Rate"))
spatial_map

spatial_map_region_1

scatter_plot
unemployment_plot